"""
@Filename       : kg_loader.py
@Create Time    : 2022/02/25 10:08
@Author         : Rylynn
@Description    : 

"""

import json
from util.preprocess import load_vocab_dict

dataset = 'memetracker'

content_kg = json.load(open('../../../data/{}/content.json'.format(dataset)))
content_id_set = content_kg.keys()

vocab_dict = load_vocab_dict('../../../data', dataset)
uikg_dict = {}

total_num = 0
has_num = 0

with open('../../../data/{}/cascade.txt'.format(dataset), encoding='utf8') as f:
    for line in f.readlines():
        total_num += 1
        if len(line.strip()) == 0:
            continue
        content_id = line.strip().split()[0]
        if content_id not in content_id_set:
            continue
        has_num += 1
        chunks = line.strip().split()[1:]
        for chunk in chunks:
            user, timestamp = chunk.split(',')
            user, timestamp = eval(user), eval(timestamp)

            if not uikg_dict.get(vocab_dict[user]):
                uikg_dict[vocab_dict[user]] = [content_id]
            else:
                uikg_dict[vocab_dict[user]].append(content_id)

for user, cid in uikg_dict.items():
    kg_list = []
    for c in cid:
        kg_list.extend(content_kg[c])
    uikg_dict[user] = kg_list


# for cid, kg in content_kg.items():
#     print(cid, kg)